Seminario Internacional de Matemática y Estadística
Colegio de Matemáticos del Perú, Region Cusco
2023-10-11
\[ECL = PD \times LGD \times EAD\]
\(PD\): Probabilidad de Impago (Probability of Default)
\(LGD\): Pérdida dado el Impago (Loss Given Default)
\(EAD\): Exposición al Impago (Exposure At Default)
CONTEXTO
LGD es la magnitud de la pérdida si el incumplimiento realmente ocurriese.
El modelo LGD es desarrollado usando los valores de las variables al momento de que el cliente Impaga.
Número de Variables: 39
Número de Obsevaciones: 1101
lgd |>
head(30) |>
kable()|>
row_spec(seq(1,30,2), background="cyan") %>%
kable_styling(full_width=FALSE)| loanid | property | occupancy | purpose | documentation | ratetype | isnegam | lienposition | originalappraisalamount | ltv | jrltv | combinedltv | originalamount | srbalance | originalterm | amorterm | fico | rate | originationdate | state | asofdate | loanage | reportedloanstatus | currentamount | modified | priorchargeoff | liquidationdate | defdate | lgd | yrm | origym | region_code | updatedcltv | updatedcltv_missing | fico_missing | singlehome | occupied | timetoliquidation | predlgd |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 140289 | S | S | P | C1 | FALSE | False | 1 | 27870.00 | 100.00000 | 0 | 100.00000 | 27870.00 | 0 | 361 | 361 | 0 | 7.500 | 199404 | PR | 201401 | 237 | 3 | 21084.86 | False | 0.00 | 201409 | 201310 | 0.000000 | 2014m9 | 1994m4 | PR | 49.658230 | 0 | 1 | 1 | 0 | 11 | 14.29697 |
| 140461 | S | S | P | C1 | FALSE | False | 1 | 44500.00 | 87.19101 | 0 | 87.19101 | 38800.00 | 0 | 361 | 361 | 635 | 9.000 | 199405 | PR | 201509 | 256 | 3 | 25270.20 | False | 0.00 | 201703 | 201310 | 0.000000 | 2017m3 | 1994m5 | PR | 37.792470 | 0 | 0 | 1 | 0 | 41 | 19.98906 |
| 140377 | S | P | P | C1 | FALSE | False | 1 | 47000.00 | 96.80851 | 0 | 96.80851 | 45500.00 | 0 | 361 | 361 | 715 | 9.000 | 199405 | PR | 201707 | 278 | 3 | 26865.95 | False | 0.00 | 201805 | 201502 | 0.000000 | 2018m5 | 1994m5 | PR | 38.588800 | 0 | 0 | 1 | 1 | 39 | 15.68278 |
| 452530 | S | P | P | C1 | FALSE | False | 1 | 110000.00 | 85.65727 | 0 | 85.65727 | 94223.00 | 0 | 361 | 361 | 560 | 7.500 | 199704 | PR | 201408 | 208 | 3 | 81577.50 | False | 0.00 | 201506 | 201310 | 0.000000 | 2015m6 | 1997m4 | PR | 52.316940 | 0 | 0 | 1 | 1 | 20 | 14.09675 |
| 142080 | S | S | C | C1 | FALSE | False | 1 | 550000.00 | 81.81818 | 0 | 81.81818 | 450000.00 | 0 | 367 | 367 | 688 | 5.500 | 199712 | PR | 201703 | 231 | 3 | 322382.25 | False | 0.00 | 201708 | 201311 | 45.716537 | 2017m8 | 1997m12 | PR | 46.056330 | 0 | 0 | 1 | 0 | 45 | 23.94747 |
| Modified_142088 | F | S | C | C1 | FALSE | False | 1 | 76000.00 | 85.00000 | 0 | 85.00000 | 64600.00 | 0 | 364 | 363 | 560 | 8.000 | 199712 | PR | 201508 | 212 | 3 | 50290.46 | True | 0.00 | 201810 | 201310 | 40.345623 | 2018m10 | 1997m12 | PR | 51.688990 | 0 | 0 | 0 | 0 | 60 | 34.14573 |
| 452301 | S | P | R | C1 | FALSE | False | 1 | 105000.00 | 80.00000 | 0 | 80.00000 | 84000.00 | 0 | 361 | 361 | 536 | 8.000 | 199803 | PR | 201802 | 239 | 3 | 58102.89 | False | 0.00 | 201910 | 201602 | 22.551184 | 2019m10 | 1998m3 | PR | 42.686300 | 0 | 0 | 1 | 1 | 44 | 18.60558 |
| 452304 | S | P | R | C1 | FALSE | False | 1 | 51200.00 | 58.59375 | 0 | 58.59375 | 30000.00 | 0 | 361 | 361 | 585 | 7.625 | 199805 | PR | 201508 | 207 | 3 | 22865.17 | False | 0.00 | 201604 | 201310 | 0.000000 | 2016m4 | 1998m5 | PR | 35.318780 | 0 | 0 | 1 | 1 | 30 | 11.75581 |
| Modified_142540 | S | P | C | C1 | FALSE | False | 1 | 160000.00 | 106.43500 | 0 | 106.43500 | 170296.00 | 0 | 523 | 360 | 583 | 5.500 | 199807 | PR | 201712 | 233 | 3 | 157464.30 | True | -32072.29 | 201906 | 201609 | 36.493534 | 2019m6 | 1998m7 | PR | 73.551710 | 0 | 0 | 1 | 1 | 33 | 25.13443 |
| 142622 | S | P | C | C1 | FALSE | False | 1 | 255000.00 | 101.96078 | 0 | 101.96078 | 260000.00 | 0 | 403 | 403 | 0 | 7.500 | 199808 | PR | 201402 | 186 | 3 | 253480.66 | False | 0.00 | 201508 | 201310 | 17.153183 | 2015m8 | 1998m8 | PR | 76.829010 | 0 | 1 | 1 | 1 | 22 | 22.69850 |
| 452447 | S | P | C | C1 | FALSE | False | 1 | 65000.00 | 80.00000 | 0 | 80.00000 | 52000.00 | 0 | 361 | 361 | 0 | 7.250 | 199812 | PR | 201404 | 184 | 3 | 51002.35 | False | 0.00 | 201604 | 201310 | 41.178493 | 2016m4 | 1998m12 | PR | 66.013690 | 0 | 1 | 1 | 1 | 30 | 21.73015 |
| Modified_452363 | S | P | R | C1 | FALSE | False | 1 | 95694.45 | 63.75000 | 0 | 63.75000 | 61005.21 | 0 | 377 | 240 | 0 | 9.875 | 199904 | PR | 201607 | 207 | 3 | 61005.21 | True | 0.00 | 201802 | 201310 | 0.000000 | 2018m2 | 1999m4 | PR | 53.662300 | 0 | 1 | 1 | 1 | 52 | 24.71834 |
| Modified_452568 | S | P | R | C1 | FALSE | False | 1 | 106945.28 | 75.00000 | 0 | 75.00000 | 80208.96 | 0 | 522 | 360 | 0 | 7.000 | 199905 | PR | 201601 | 200 | 3 | 80208.96 | True | 0.00 | 201911 | 201310 | 103.573650 | 2019m11 | 1999m5 | PR | 67.271130 | 0 | 1 | 1 | 1 | 73 | 35.82407 |
| 452342 | S | P | P | C1 | FALSE | False | 1 | 98000.00 | 121.42857 | 0 | 121.42857 | 119000.00 | 0 | 431 | 431 | 578 | 7.500 | 199905 | PR | 201603 | 202 | 3 | 106903.32 | False | 0.00 | 201905 | 201310 | 53.228767 | 2019m5 | 1999m5 | PR | 89.317700 | 0 | 0 | 1 | 1 | 67 | 41.07854 |
| Modified_452571 | S | P | R | C1 | FALSE | False | 1 | 103487.84 | 70.00000 | 0 | 70.00000 | 72441.49 | 0 | 600 | 480 | 610 | 12.000 | 199906 | PR | 201504 | 190 | 3 | 72441.49 | True | 0.00 | 201609 | 201310 | 3.369616 | 2016m9 | 1999m6 | PR | 59.802070 | 0 | 0 | 1 | 1 | 35 | 21.30300 |
| 800484 | C | P | P | C1 | FALSE | False | 1 | 65000.00 | 70.76923 | 0 | 70.76923 | 46000.00 | 0 | 482 | 482 | 0 | 4.875 | 199906 | PR | 201804 | 226 | 3 | 39701.82 | False | 0.00 | 202001 | 201409 | 37.030594 | 2020m1 | 1999m6 | PR | 57.248840 | 0 | 1 | 0 | 1 | 64 | 33.29693 |
| 452321 | S | P | C | C1 | FALSE | False | 1 | 84000.00 | 30.95238 | 0 | 30.95238 | 26000.00 | 0 | 180 | 180 | 491 | 9.000 | 199908 | PR | 201603 | 199 | 3 | 1541.21 | False | 0.00 | 201906 | 201401 | 0.000000 | 2019m6 | 1999m8 | PR | 1.511919 | 0 | 0 | 1 | 1 | 65 | 11.90945 |
| Reperform_452590 | S | P | C | C1 | FALSE | False | 1 | 105000.00 | 65.00000 | 0 | 65.00000 | 68250.00 | 0 | 361 | 361 | 0 | 10.000 | 199909 | PR | 201809 | 228 | 3 | 50829.27 | False | 0.00 | 201905 | 201709 | 0.000000 | 2019m5 | 1999m9 | PR | 41.253260 | 0 | 1 | 1 | 1 | 20 | 10.50160 |
| 800524 | C | P | P | C1 | FALSE | False | 1 | 69000.00 | 57.97102 | 0 | 57.97102 | 40000.00 | 0 | 181 | 360 | 0 | 9.500 | 199911 | PR | 201608 | 201 | 3 | 32607.55 | False | 0.00 | 201805 | 201412 | 20.262178 | 2018m5 | 1999m11 | PR | 44.312710 | 0 | 1 | 0 | 1 | 41 | 21.77328 |
| Modified_452373 | S | P | R | C1 | FALSE | False | 1 | 86000.00 | 70.00000 | 0 | 70.00000 | 60200.00 | 0 | 386 | 240 | 0 | 11.000 | 199911 | PR | 201512 | 193 | 3 | 56506.63 | True | 0.00 | 201609 | 201310 | 0.000000 | 2016m9 | 1999m11 | PR | 58.649680 | 0 | 1 | 1 | 1 | 35 | 20.92853 |
| 143892 | S | P | R | C1 | FALSE | False | 1 | 216000.00 | 50.62593 | 0 | 50.62593 | 109352.00 | 0 | 358 | 358 | 0 | 7.500 | 199911 | PR | 201510 | 191 | 3 | 86421.62 | False | 0.00 | 201603 | 201310 | 0.000000 | 2016m3 | 1999m11 | PR | 36.684860 | 0 | 1 | 1 | 1 | 29 | 11.88146 |
| 452554 | S | P | C | C1 | FALSE | False | 1 | 181000.00 | 75.00000 | 0 | 75.00000 | 135750.00 | 0 | 360 | 360 | 0 | 10.125 | 199911 | PR | 201507 | 188 | 3 | 117406.58 | False | 0.00 | 201608 | 201310 | 14.825404 | 2016m8 | 1999m11 | PR | 57.498480 | 0 | 1 | 1 | 1 | 34 | 20.23618 |
| Modified_144034 | S | P | C | C1 | FALSE | False | 1 | 147000.00 | 71.42857 | 0 | 71.42857 | 105000.00 | 0 | 502 | 360 | 534 | 5.625 | 199912 | PR | 201802 | 218 | 3 | 99843.70 | True | 0.00 | 202001 | 201501 | 26.885721 | 2020m1 | 1999m12 | PR | 66.615550 | 0 | 0 | 1 | 1 | 60 | 31.47362 |
| 452562 | S | P | C | C1 | FALSE | False | 1 | 65000.00 | 64.15385 | 0 | 64.15385 | 41700.00 | 0 | 360 | 360 | 0 | 9.125 | 199912 | PR | 201403 | 171 | 3 | 35459.29 | False | 0.00 | 201412 | 201310 | 52.056881 | 2014m12 | 1999m12 | PR | 48.481660 | 0 | 1 | 1 | 1 | 14 | 10.94089 |
| 800546 | C | P | P | C1 | FALSE | False | 1 | 193255.41 | 80.00000 | 0 | 80.00000 | 154604.33 | 0 | 525 | 525 | 0 | 5.500 | 200003 | PR | 201707 | 208 | 3 | 153707.55 | False | 0.00 | 201810 | 201506 | 28.435528 | 2018m10 | 2000m3 | PR | 74.610400 | 0 | 1 | 0 | 1 | 40 | 31.30027 |
| Modified_450023 | S | P | C | C1 | FALSE | False | 1 | 71980.83 | 76.92308 | 0 | 76.92308 | 55369.87 | 0 | 483 | 360 | 624 | 9.000 | 200006 | PR | 201604 | 190 | 3 | 55206.09 | True | 0.00 | 201907 | 201401 | 67.394905 | 2019m7 | 2000m6 | PR | 67.094190 | 0 | 0 | 1 | 1 | 66 | 33.53873 |
| 450046 | S | P | C | C1 | FALSE | False | 1 | 95000.00 | 78.83158 | 0 | 78.83158 | 74890.00 | 0 | 425 | 425 | 0 | 10.500 | 200009 | PR | 201504 | 175 | 3 | 71331.30 | False | 0.00 | 201512 | 201310 | 38.315578 | 2015m12 | 2000m9 | PR | 71.567990 | 0 | 1 | 1 | 1 | 26 | 22.26198 |
| Modified_145171 | S | P | P | C1 | FALSE | False | 1 | 145794.67 | 85.00000 | 0 | 85.00000 | 123925.47 | 0 | 600 | 480 | 610 | 7.500 | 200009 | PR | 201610 | 193 | 3 | 123008.00 | True | -5151.00 | 202001 | 201507 | 49.462742 | 2020m1 | 2000m9 | PR | 87.502060 | 0 | 0 | 1 | 1 | 54 | 36.35113 |
| 800606 | S | P | C | C1 | FALSE | False | 1 | 220000.00 | 63.50409 | 0 | 63.50409 | 139709.00 | 0 | 229 | 229 | 0 | 6.500 | 200010 | PR | 201805 | 211 | 3 | 55702.33 | False | 0.00 | 201903 | 201506 | 0.000000 | 2019m3 | 2000m10 | PR | 23.828150 | 0 | 1 | 1 | 1 | 45 | 12.79587 |
| 145707 | S | S | P | C1 | FALSE | False | 1 | 340000.00 | 76.25000 | 0 | 76.25000 | 259250.00 | 0 | 394 | 394 | 661 | 6.250 | 200103 | PR | 201611 | 188 | 3 | 220821.59 | False | 0.00 | 201805 | 201310 | 20.750235 | 2018m5 | 2001m3 | PR | 66.732020 | 0 | 0 | 1 | 0 | 55 | 33.84868 |
lgd
theme_set(theme_minimal())
library(plotly)
(lgd |>
ggplot(aes(lgd)) +
geom_histogram(colour="black",
aes(y = ..count.. , fill = ..count..),
show.legend = FALSE,
bins = 30)+
scale_fill_gradient("Count", low="white", high="forestgreen")+
xlab("Loss Given Default") ) |>
ggplotly()Variables Numéricas
library(gridExtra)
theme_set(theme_minimal())
df <- lgd
p1 <- df %>%
ggplot(aes(lgd)) +
geom_histogram(colour="black",
aes(y=..count..,fill=..count..),
show.legend = FALSE,
bins = 30)+
scale_fill_gradient("Count", low="white", high="forestgreen")+
xlab("Loss Given Default")
p2 <- df %>%
ggplot(aes(updatedcltv)) +
geom_histogram(colour="black",
aes(y=..count..,fill=..count..),
show.legend = FALSE,
bins = 30)+
scale_fill_gradient("Count", low="white", high="forestgreen")+
xlab("Updated CLTV")
p3 <- df %>%
ggplot(aes(timetoliquidation)) +
geom_histogram(colour="black",
aes(y=..count..,fill=..count..),
show.legend = FALSE,
bins = 30)+
scale_fill_gradient("Count", low="white", high="forestgreen")+
xlab("Time to Liquidation")
grid.arrange(p1,p2,p3,ncol=3)df1 <- lgd
p4 <- df1 %>%
ggplot(aes(updatedcltv, lgd))+
geom_point(alpha = 0.5, color = 'forestgreen')+
geom_smooth(method='lm')
p5 <- df1 %>%
ggplot(aes(updatedcltv, lgd, colour = as.factor(updatedcltv_missing)))+
geom_point(alpha = 0.5)+
labs(colour="Upcltv \nMissing")
grid.arrange(p4,p5,ncol=2)df1 %>%
ggplot(aes(timetoliquidation, lgd))+
geom_point(alpha = 0.5, color = 'forestgreen')+
geom_smooth(method='lm')p6 <- df1 %>%
ggplot(aes(singlehome, lgd, fill=as.factor(singlehome), colour = as.factor(singlehome)))+
geom_boxplot(alpha = 0.3, show.legend = FALSE)
p7 <- df1 %>%
ggplot(aes(occupied, lgd, fill = as.factor(occupied), colour = as.factor(occupied)))+
geom_boxplot(alpha = 0.3, show.legend = FALSE)
grid.arrange(p6,p7,ncol=2)library(tidymodels)
df1 <- df %>% select(lgd,
updatedcltv_missing,
updatedcltv,
timetoliquidation,
occupied,
singlehome)
lm_spec <- linear_reg() %>%
set_mode("regression") %>%
set_engine("lm")
lm_fit <- lm_spec %>%
fit(lgd ~ updatedcltv_missing +
updatedcltv+
timetoliquidation+
occupied+
singlehome,
data = df1)
lm_fit %>%
tidy() %>%
select(term,estimate, p.value) %>%
kable(booktabs = TRUE,
align = 'lrr',
linesep = "",
caption = "Estimated Coefficients",
format.args = list(big.mark = ",",digits = 3)) %>%
row_spec(seq(1,6,2), background="cyan") %>%
kable_styling(full_width=FALSE)| term | estimate | p.value |
|---|---|---|
| (Intercept) | -1.746 | 0.62349 |
| updatedcltv_missing | 32.855 | 0.00000 |
| updatedcltv | 0.325 | 0.00000 |
| timetoliquidation | 0.318 | 0.00000 |
| occupied | -3.929 | 0.02456 |
| singlehome | -3.594 | 0.00721 |
\[LGD =-1.746+32.855*updatedcltv\_missing+0.325*updatedcltv+\] \[0.318*timetoliquidation -3.929*occupied-3.594*singlehome\]
x <- glance(lm_fit) %>% select(c(1,8,9))
lm_model <- lm(lgd ~ updatedcltv_missing +
updatedcltv+
timetoliquidation+
occupied+
singlehome,
data = df1)
library(modelr)
MAE <- mae(lm_model,df1)
MSE <- mse(lm_model,df1)
AIC <- x$AIC
BIC <- x$BIC
R2 <- x$r.squared
x <- as_tibble(cbind(MAE,AIC,BIC,MSE,R2))
x %>% kable(booktabs = TRUE,
align = 'ccccc',
linesep = "",
caption = "Evaluation Metrics",
format.args = list(big.mark = ",",digits = 4, scientific=FALSE)) %>%
row_spec(1,background="cyan") %>%
kable_styling(full_width=FALSE)| MAE | AIC | BIC | MSE | R2 |
|---|---|---|---|---|
| 15.09 | 9,602 | 9,637 | 354.4 | 0.1643 |
Comparación agrupado por liquidationdate
df %>%
select(liquidationdate,lgd,predlgd) %>%
group_by(liquidationdate) %>%
summarise(lgd_mean = mean(lgd),
predlgd_mean = mean(predlgd)) %>%
mutate(liquidationdate = as.Date(paste(liquidationdate,01), format = "%Y%m%d")) %>%
ggplot(aes(liquidationdate))+
geom_line(aes(y= lgd_mean, color = "Observed"),lwd =1.3,alpha=0.7)+
geom_line(aes(y= predlgd_mean,color='Forecast'),lwd=1.3,alpha=0.7)+
ylim(0,60)+
scale_x_date(breaks = scales::breaks_pretty(20))+
theme(axis.text.x=element_text(angle=90,hjust=1),
plot.title = element_text(hjust = 0.5))+
labs(x = "Liquidation Date", y = "LGD (%)", color = "Legend")+
ggtitle("LGD - All Loans")detach("package:modelr", unload=TRUE)
#etach_package("modelr", TRUE)
library(doParallel)
library(xgboost)
df1 <- df %>% select(lgd,
fico,
loanage,
updatedcltv,
timetoliquidation,
occupied,
singlehome)
lgd_train <- df1
# Define Xgboost Model
xgb_spec <- boost_tree(
trees = 1000,
tree_depth = tune(),
min_n = tune(),
loss_reduction = tune(),
sample_size = tune(),
mtry = tune(),
learn_rate = tune(),
) %>%
set_engine("xgboost", objective = "reg:squarederror") %>%
set_mode("regression")
# Define Grid: Space filling Design
xgb_grid <- grid_latin_hypercube(
tree_depth(),
min_n(),
loss_reduction(),
sample_size = sample_prop(),
finalize(mtry(), lgd_train),
learn_rate(),
size = 90
)
# Define Workflow
xgb_wf <- workflow() %>%
add_formula(lgd ~ .) %>%
add_model(xgb_spec)
# Define CV Folds
set.seed(123)
lgd_folds <- vfold_cv(lgd_train, strata = lgd)
# High Performance Computation
all_cores <- parallel::detectCores()
registerDoParallel(cores = all_cores)
# Tuning: Grid Search
set.seed(234)
xgb_res <- tune_grid(
xgb_wf,
resamples = lgd_folds,
grid = xgb_grid,
control = control_grid(save_pred = TRUE, verbose =TRUE),
metrics = metric_set(rmse, rsq, mae)
)Hyperparameters and Evaluation metrics
Feature Importance Plot
Evaluation Metrics
| MAE | MSE | R2 |
|---|---|---|
| 14.54 | 327.8 | 0.2371 |
Comparison
Explanation based on SHAP values